/*
* Copyright 2017 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.spydra;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.dataproc.Dataproc;
import com.google.api.services.dataproc.model.Cluster;
import com.google.api.services.dataproc.model.ListClustersResponse;
import com.google.common.collect.Lists;
import com.spotify.spydra.model.ClusterType;
import com.spotify.spydra.model.SpydraArgument;
import com.spotify.spydra.submitter.api.DynamicSubmitter;
import com.spotify.spydra.submitter.api.Submitter;
import com.spotify.spydra.util.GcpUtils;
import com.spotify.spydra.util.SpydraArgumentUtil;
import org.apache.hadoop.examples.WordCount;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.URI;
import java.security.GeneralSecurityException;
import java.util.Collections;
import java.util.UUID;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class LifecycleIT {
private static final Logger LOGGER = LoggerFactory.getLogger(LifecycleIT.class);
private final static int INTERVAL = 30 * 1000;
private final static String CLIENT_ID = UUID.randomUUID().toString();
private final static GcpUtils gcpUtils = new GcpUtils();
@Test
public void testLifecycle() throws Exception {
SpydraArgument arguments = SpydraArgumentUtil.loadArguments("integration-test-config.json");
gcpUtils.configureClusterProjectFromCredential(arguments);
arguments.setClusterType(ClusterType.DATAPROC);
arguments.getCluster().getOptions().put("num-workers", "3");
arguments.getSubmit().getOptions().put(SpydraArgument.OPTION_JAR, getExamplesJarPath());
arguments.getSubmit().setJobArgs(Lists.newArrayList("pi", "1", "1"));
arguments.setHeartbeatIntervalSeconds(INTERVAL);
arguments.setClientId(CLIENT_ID);
String json = gcpUtils.credentialJsonFromEnv();
String userId = gcpUtils.userIdFromJsonCredential(json);
arguments = SpydraArgumentUtil.mergeConfigurations(arguments, userId);
arguments.replacePlaceholders();
SpydraArgumentUtil.checkRequiredArguments(arguments, false, false);
// TODO We should test the init action as well but the uploading before running the test is tricky
// We could upload it manually to a test bucket here and set the right things
arguments.getCluster().getOptions().remove(SpydraArgument.OPTION_INIT_ACTIONS);
Submitter submitter = new DynamicSubmitter();
assertTrue("job wasn't successful", submitter.executeJob(arguments));
assertTrue(isClusterCollected(arguments));
URI doneUri = URI.create(arguments.clusterProperties().getProperty(
"mapred:mapreduce.jobhistory.done-dir"));
assertEquals(2, getFileCount(doneUri));
URI intermediateUri = URI.create(arguments.clusterProperties().getProperty(
"mapred:mapreduce.jobhistory.intermediate-done-dir"));
assertEquals(0, getFileCount(intermediateUri));
}
private boolean isClusterCollected(SpydraArgument arguments)
throws IOException, GeneralSecurityException {
GoogleCredential credential = GoogleCredential.fromStream(
new ByteArrayInputStream(gcpUtils.credentialJsonFromEnv().getBytes()));
if (credential.createScopedRequired()) {
credential =
credential.createScoped(
Collections.singletonList("https://www.googleapis.com/auth/cloud-platform"));
}
HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
Dataproc dataprocService =
new Dataproc.Builder(httpTransport, jsonFactory, credential)
.setApplicationName("Google Cloud Platform Sample")
.build();
Dataproc.Projects.Regions.Clusters.List request =
dataprocService.projects().regions().clusters().list(
arguments.getCluster().getOptions().get(SpydraArgument.OPTION_PROJECT), "global");
ListClustersResponse response;
do {
response = request.execute();
if (response.getClusters() == null) continue;
String clusterName = arguments.getCluster().getName();
for (Cluster cluster : response.getClusters()) {
if (cluster.getClusterName().equals(clusterName)) {
String status = cluster.getStatus().getState();
LOGGER.info("Cluster state is" + status);
return status.equals("DELETING");
}
}
request.setPageToken(response.getNextPageToken());
} while (response.getNextPageToken() != null);
return true;
}
private int getFileCount(URI uri) throws IOException {
FileSystem fs = gcpUtils.fileSystemForUri(uri);
RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(uri), true);
int count = 0;
while (it.hasNext()) {
it.next();
count++;
}
return count;
}
private String getExamplesJarPath() {
Class clazz = WordCount.class;
return clazz.getProtectionDomain().getCodeSource().getLocation().getPath();
}
}